---
title: "Cleaning ethnic and linguistic fractionalization"
---

Sources:
https://scholar.harvard.edu/files/alesina/files/fractionalization.pdf
Transcribed and copied from here, after checking the table matched:
https://en.wikipedia.org/wiki/List_of_countries_ranked_by_ethnic_and_cultural_diversity_level

# Load

```{r}
# load packages
  source("helper-packages.R")

# load Vpart Dataset
  elf_raw <- 
    import("../raw-data/x-relig-fractionalization/religious_fractionalization_index.xlsx")
```

# Clean 

```{r}
# clean
  elf_clean <- 
    elf_raw %>%
    filter(!str_detect(country, "Gaza Strip")) %>% # keep main part of palestine only
    mutate(
      country_common = countryname(str_trim(country)),
      
      ef_original = as.numeric(ethnic_frac),
      ef_median = median(ef_original, na.rm = T),
      ef_above_median = (ef_original >= median(ef_original, na.rm = T))*1,

      lf_original = as.numeric(linguistic_frac),
      lf_median = median(lf_original, na.rm = T),
      lf_above_median = (lf_original >= median(lf_original, na.rm = T))*1,

      ) %>% 
    select(country_common, starts_with("ef_"), starts_with("lf_")) %>% 
    rename_with(~paste("adekw", .x, sep = "_")) %>% 
    filter(!is.na(adekw_country_common))
```

# Save data

```{r}
  saveRDS(elf_clean, "../cleaned-data/x-13-ethlang-fractionalization.rds")
```
